Analysis date: 2020-01-10
library(plyr)
library(gtools)
library(openxlsx)
library(pheatmap)
library(reshape2)
library(progress)
library(Matrix)
library(Hmisc)
library(lemon)
library(ggpubr)
library(effsize)
library(ggbeeswarm)
library(ggfortify)
library(ggpmisc)
library(ggrepel)
library(readxl)
library(DESeq2)
library(TOSTER)
library(tidyverse)
library(vsn)
library(fdrtool)
library(limma)
library(apeglm)
library(IHW)
library(Rtsne)
library(biomartr)
library(biomaRt)
library(MultiAssayExperiment)
library(PMA)
library(gplots)
library(RColorBrewer)
library(grid)
library(ConsensusClusterPlus)
library(survival)
library(survminer)
library(cowplot)
source("/Volumes/sd17b003/Sophie/Analysis/Screen_analysis/Figure_layouts.R")
load("/Volumes/sd17b003/Sophie/Analysis/CLL_Proteomics/CLL_Proteomics_final/Proteomics_Git/Robjects/CLL_Proteomics_Setup.RData")
load("/Volumes/sd17b003/Sophie/Analysis/CLL_Proteomics/CLL_Proteomics_final/Proteomics_Git/Robjects/CLL_Proteomics_ConsensusClustering.RData")
colData(multiomics_MAE)$PG <- as.factor(CCP_group5[rownames(colData(multiomics_MAE))])
colData(multiomics_MAE)$CCP6_RNA <- as.factor(CCP_group6_RNA[rownames(colData(multiomics_MAE))])
wilcox_proteins_any <- function(g, alteration, plot=FALSE, output=TRUE){
if("SNPs" %in% (experiments(multiomics_MAE[alteration,,]) %>% names()) ){
ty="SNPs_"}else if("chrom_abber" %in% (experiments(multiomics_MAE[alteration,,]) %>% names() ) ){
ty="chrom_abber_"}else if("health_record_bin" %in% (experiments(multiomics_MAE[alteration,,]) %>% names() ) ){
ty="health_record_bin_"}
dat_g <- wideFormat(multiomics_MAE[c(g , alteration),,] ) %>% as_tibble()
dat_g <- dat_g %>% mutate(alt =as.logical(get(paste0(ty, alteration), envir=as.environment(dat_g))))
if(grepl("ENSG", g)){
assay_data="RNAseq_norm_"
cap <- metadata(multiomics_MAE)$gene_symbol_mapping %>% filter(ensembl_gene_id==g) %>% .$hgnc_symbol
col_b <- "#92c5de"
col_r <- "#f4a582"
cap_y <- paste("Transcript abundance", cap)
}else{
assay_data="proteomics_"
cap <- g
col_b <- "#0571b0"
col_r <- "#ca0020"
cap_y <- paste("Protein abundance", cap)
}
wt <- dat_g %>% filter(alt==0) %>% .[,paste0(assay_data, g)] %>% unlist
mut <- dat_g %>% filter(alt==1) %>% .[,paste0(assay_data, g)] %>% unlist
alt_lab <- alteration
alt_lab <- gsub("_mutated", "", alt_lab)
wt_lab <- "wt"
mut_lab <- "mut"
if(alt_lab=="IGHV"){
wt_lab="U-CLL"
mut_lab="M-CLL"}
wx.test <- wilcox.test(wt, mut)
if(plot==TRUE){
p <- dat_g %>% filter(!is.na(alt )) %>%
ggplot(aes_string("alt", paste0(assay_data,g), fill="alt" )) +
geom_boxplot() + geom_beeswarm() +
xlab(alt_lab) +
scale_x_discrete(labels=c(wt_lab, mut_lab)) +
ylab(cap_y)+
pp_sra +
scale_fill_manual(values = c(col_b,col_r))
print(p+theme(aspect.ratio=2)+ theme(legend.position = 'none') +
stat_compare_means(method = "wilcox", label = "p.signif",
comparisons = list(c("TRUE","FALSE"))) )
print(paste("pvalue",wx.test$p.value) )
}
if(output==TRUE){return(c(g, alteration, length(wt), length(mut), wx.test$p.value))
}else if(all(plot==TRUE, output=="plot")){return(p)}
}
ZAP70_P_plot <- wilcox_proteins_any(g="ZAP70", plot = TRUE, output = "plot", alteration="IGHV_mutated")
## [1] "pvalue 7.77315882449946e-07"
ZAP70_R_plot <- wilcox_proteins_any(g="ENSG00000115085", plot = TRUE, output = "plot", alteration="IGHV_mutated")
## [1] "pvalue 2.57528421801168e-07"
wilcox_proteins_any(g="BTK", plot = TRUE, alteration="IGHV_mutated")
## [1] "pvalue 0.423423159867772"
## [1] "BTK" "IGHV_mutated" "33"
## [4] "42" "0.423423159867772"
wilcox_proteins_any(g="BTK", plot = TRUE, alteration="trisomy12")
## [1] "pvalue 0.00018643349672198"
## [1] "BTK" "trisomy12" "56"
## [4] "21" "0.00018643349672198"
ATM_P_plot <- wilcox_proteins_any(g="ATM", plot = TRUE, output = "plot", alteration="ATM")
## [1] "pvalue 0.00103072061207342"
ATM_R_plot <- wilcox_proteins_any(g="ENSG00000149311", plot = TRUE, output = "plot", alteration="ATM")
## [1] "pvalue 0.172492816496971"
TP53_P_plot <- wilcox_proteins_any(g="TP53", plot = TRUE, output = "plot", alteration="TP53")
## [1] "pvalue 0.000117833904429591"
TP53_R_plot <- wilcox_proteins_any(g="ENSG00000141510", plot = TRUE, output = "plot", alteration="TP53")
## [1] "pvalue 0.00502557822614873"
SF3B1_P_plot <- wilcox_proteins_any(g="SF3B1", plot = TRUE, alteration="SF3B1", output = "plot")
## [1] "pvalue 0.248991438694799"
wilcox_proteins_any(g="ENSG00000115524", plot = TRUE, alteration="TP53")
## [1] "pvalue 0.0844632924013222"
## [1] "ENSG00000115524" "TP53" "67"
## [4] "13" "0.0844632924013222"
XPO1_P_plot <- wilcox_proteins_any(g="XPO1", plot = TRUE, output = "plot", alteration="XPO1")
## [1] "pvalue 0.00378833710722129"
XPO1_R_plot <- wilcox_proteins_any(g="ENSG00000082898", plot = TRUE, output = "plot", alteration="XPO1")
## [1] "pvalue 0.182153110588958"
IGHM_P_plot <- wilcox_proteins_any(g="IGHM", plot = TRUE, alteration="IGHV_mutated", output = "plot")
## [1] "pvalue 0.000898156918617089"
PG5_IGHV_ZAP60IGHM_scatter <- wideFormat(multiomics_MAE[c("ZAP70", "IGHM" , "IGHV_mutated"),,], colDataCols = "PG" ) %>% as_tibble() %>%
filter(PG==5, health_record_bin_IGHV_mutated %in% c(1,0) ) %>%
mutate(IGHV=as.factor(if_else(health_record_bin_IGHV_mutated==0, "U-CLL", "M-CLL"))) %>%
ggplot(aes(proteomics_ZAP70, proteomics_IGHM, color=IGHV)) +
geom_point() + pp_sra +
scale_color_manual(values = c("#ca0020", "#0571b0")) +
xlab("Protein abundance ZAP70") +
ylab("Protein abundance IGHM")
## ExperimentList contains data.frame or DataFrame,
## potential for errors with mixed data types
PG5_IGHV_ZAP60IGHM_scatter
plot_chromosome_theme <- list(
coord_cartesian(ylim=c(-0.7,0.7)),
facet_wrap(~paste("chromosome",chromosome_name), scales = "free_x"),
ylab("log2 norm. protein abundance"),
xlab("Protein location on chromosome"),
scale_color_manual(values=c("#0571b0", "#ca0020", "grey"))
)
Chr12_P_plot <- proteomics_tbl_meta_biomart %>%
filter( !is.na(value),
chromosome_name %in% c("12")) %>%
ggplot(aes(start_position, value, group=primary)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_trisomy12), span=0.5, method = "loess") +
plot_chromosome_theme +
pp_sra +
ggtitle("trisomy12") +
geom_rect(xmin = 0, ymin=-0.68, ymax=0.68, xmax=133275309, color="gray40", size=1.5, fill=NA)
Chr12_P_plot + theme(aspect.ratio=0.4, legend.position = 'none')
Chr11_P_plot <- proteomics_tbl_meta_biomart %>%
filter( !is.na(value),
chromosome_name %in% c("11")) %>%
ggplot(aes(start_position, value, group=primary)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del11q), span=0.5, method = "loess") +
plot_chromosome_theme +
pp_sra_noguides +
ggtitle("del11q")+
geom_rect(xmin = 97400000, ymin=-0.68, ymax=0.68, xmax=110600000, color="gray40", size=1.5, fill=NA)
Chr11_P_plot + theme(aspect.ratio=0.4, legend.position = 'none')
Chr13_P_plot <- proteomics_tbl_meta_biomart %>%
filter( !is.na(value),
chromosome_name %in% c("13")) %>%
ggplot(aes(start_position, value, group=primary)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del13q14), span=0.5, method = "loess") +
plot_chromosome_theme +
pp_sra_noguides +
ggtitle("del13q")+
geom_rect(xmin = 47000000,ymin=-0.68, ymax=0.68, xmax=51000000, color="gray40", size=1.5, fill=NA)
Chr13_P_plot + theme(aspect.ratio=0.4, legend.position = 'none')
Chr17_P_plot <- proteomics_tbl_meta_biomart %>%
filter( !is.na(value),
chromosome_name %in% c("17")) %>%
ggplot(aes(start_position, value, group=primary)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del17p13), span=0.5, method = "loess") +
plot_chromosome_theme +
pp_sra +
ggtitle("del17p")+
geom_rect(xmin = 0, ymin=-0.68, ymax=0.68, xmax=10800000, color="gray40", size=1.5, fill=NA)
Chr17_P_plot + theme(aspect.ratio=0.4, legend.position = 'none')
Chr8_P_plot <- proteomics_tbl_meta_biomart %>%
filter( !is.na(value),
chromosome_name %in% c("8")) %>%
ggplot(aes(start_position, value, group=primary)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_gain8q24), span=0.5, method = "loess") +
plot_chromosome_theme +
pp_sra +
ggtitle("gain8q24")+
geom_rect(xmin = 117700001, ymin=-0.68, ymax=0.68, xmax=146364022, color="gray40", size=1.5, fill=NA)
Chr8_P_plot + theme(aspect.ratio=0.4, legend.position = 'none')
plot_chromosome_theme_RNA <- list(
coord_cartesian(ylim=c(0.75,1.25)),
facet_wrap(~paste("chromosome",chromosome_name), scales = "free_x"),
ylab("log10 norm. RNA counts"),
xlab("Gene location on chromosome"),
scale_color_manual(values=c("#92c5de", "#f4a582", "grey"))
)
RNA_biomart<- left_join(
left_join((longFormat(multiomics_MAE[,,"RNAseq_norm"] ) %>% as_tibble()),
metadata(multiomics_MAE)$gene_symbol_mapping[c(1,2,3,5:7)],
by=c("rowname"="ensembl_gene_id")),
proteomics_tbl_meta_biomart_chrab, by=c("primary"))
## ExperimentList contains data.frame or DataFrame,
## potential for errors with mixed data types
## harmonizing input:
## removing 22 colData rownames not in sampleMap 'primary'
RNA_biomart <- left_join( left_join(RNA_biomart,
proteomics_tbl_meta_biomart_SNP, by=c("primary")),
proteomics_tbl_meta_biomart_health, by=c("primary"))
# Trisomy 12
Chr12_R_plot <- RNA_biomart %>%
filter( !is.na(value),
chromosome_name %in% c("12")) %>%
ggplot(aes(start_position, value, group=primary)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_trisomy12), span=0.5, method = "loess") +
plot_chromosome_theme_RNA +
pp_sra +
ggtitle("trisomy12") +
coord_cartesian(ylim=c(0.95,1.1)) +
geom_rect(xmin = 0, ymin=0.95, ymax=1.1, xmax=133275309, color="gray40", size=1.5, fill=NA)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
Chr12_R_plot + theme(aspect.ratio=0.4, legend.position = 'none')
# del11q
Chr11_R_plot <- RNA_biomart %>%
filter( !is.na(value),
chromosome_name %in% c("11")) %>%
ggplot(aes(start_position, value, group=primary)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del11q), span=0.5, method = "loess") +
plot_chromosome_theme_RNA +
pp_sra_noguides +
ggtitle("del11q")+
coord_cartesian(ylim=c(0.95,1.1)) +
geom_rect(xmin = 97400000, ymin=0.95, ymax=1.1, xmax=110600000, color="gray40", size=1.5, fill=NA)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
Chr11_R_plot + theme(aspect.ratio=0.4, legend.position = 'none')
# del13q
Chr13_R_plot <- RNA_biomart %>%
filter( !is.na(value),
chromosome_name %in% c("13")) %>%
ggplot(aes(start_position, value, group=primary)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del13q14), span=0.5, method = "loess") +
plot_chromosome_theme_RNA +
pp_sra_noguides +
ggtitle("del13q")+
coord_cartesian(ylim=c(0.95,1.1)) +
geom_rect(xmin = 47000000, ymin=0.95, ymax=1.1, xmax=51000000, color="gray40", size=1.5, fill=NA)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
Chr13_R_plot + theme(aspect.ratio=0.4, legend.position = 'none')
# del17p
Chr17_R_plot <- RNA_biomart %>%
filter( !is.na(value),
chromosome_name %in% c("17")) %>%
ggplot(aes(start_position, value, group=primary)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del17p13), span=0.5, method = "loess") +
plot_chromosome_theme_RNA +
pp_sra +
ggtitle("del17p")+
coord_cartesian(ylim=c(0.95,1.1)) +
geom_rect(xmin = 0, ymin=0.95, ymax=1.1, xmax=10800000, color="gray40", size=1.5, fill=NA)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
Chr17_R_plot + theme(aspect.ratio=0.4, legend.position = 'none')
Chr8_R_plot <- RNA_biomart %>%
filter( !is.na(value),
chromosome_name %in% c("8")) %>%
ggplot(aes(start_position, value, group=primary)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_gain8q24), span=0.5, method = "loess") +
plot_chromosome_theme_RNA +
pp_sra +
ggtitle("gain8q24")+
coord_cartesian(ylim=c(0.95,1.1)) +
geom_rect(xmin = 117700001, ymin=0.95, ymax=1.1, xmax=146364022, color="gray40", size=1.5, fill=NA)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
Chr8_R_plot + theme(aspect.ratio=0.4, legend.position = 'none')
sessionInfo()
## R version 3.5.2 (2018-12-20)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] cowplot_1.0.0 survminer_0.4.6
## [3] ConsensusClusterPlus_1.46.0 RColorBrewer_1.1-2
## [5] gplots_3.0.1.1 PMA_1.1
## [7] MultiAssayExperiment_1.8.3 biomaRt_2.38.0
## [9] biomartr_0.9.0 Rtsne_0.15
## [11] IHW_1.10.1 apeglm_1.4.2
## [13] limma_3.38.3 fdrtool_1.2.15
## [15] vsn_3.50.0 forcats_0.4.0
## [17] stringr_1.4.0 dplyr_0.8.3
## [19] purrr_0.3.3 readr_1.3.1
## [21] tidyr_1.0.0 tibble_2.1.3
## [23] tidyverse_1.3.0 TOSTER_0.3.4
## [25] DESeq2_1.22.2 SummarizedExperiment_1.12.0
## [27] DelayedArray_0.8.0 BiocParallel_1.16.6
## [29] matrixStats_0.55.0 Biobase_2.42.0
## [31] GenomicRanges_1.34.0 GenomeInfoDb_1.18.2
## [33] IRanges_2.16.0 S4Vectors_0.20.1
## [35] BiocGenerics_0.28.0 readxl_1.3.1
## [37] ggrepel_0.8.1 ggpmisc_0.3.3
## [39] ggfortify_0.4.8 ggbeeswarm_0.6.0
## [41] effsize_0.7.6 ggpubr_0.2.4
## [43] magrittr_1.5 lemon_0.4.3
## [45] Hmisc_4.3-0 ggplot2_3.2.1
## [47] Formula_1.2-3 survival_3.1-8
## [49] lattice_0.20-38 Matrix_1.2-18
## [51] progress_1.2.2 reshape2_1.4.3
## [53] pheatmap_1.0.12 openxlsx_4.1.4
## [55] gtools_3.8.1 plyr_1.8.4
##
## loaded via a namespace (and not attached):
## [1] tidyselect_0.2.5 RSQLite_2.1.4 AnnotationDbi_1.44.0
## [4] htmlwidgets_1.5.1 munsell_0.5.0 codetools_0.2-16
## [7] preprocessCore_1.44.0 withr_2.1.2 colorspace_1.4-1
## [10] knitr_1.26 rstudioapi_0.10 ggsignif_0.6.0
## [13] labeling_0.3 slam_0.1-46 bbmle_1.0.20
## [16] GenomeInfoDbData_1.2.0 lpsymphony_1.10.0 KMsurv_0.1-5
## [19] bit64_0.9-7 farver_2.0.1 coda_0.19-3
## [22] vctrs_0.2.0 generics_0.0.2 xfun_0.11
## [25] R6_2.4.1 locfit_1.5-9.1 bitops_1.0-6
## [28] assertthat_0.2.1 scales_1.1.0 nnet_7.3-12
## [31] beeswarm_0.2.3 gtable_0.3.0 affy_1.60.0
## [34] rlang_0.4.2 zeallot_0.1.0 genefilter_1.64.0
## [37] splines_3.5.2 lazyeval_0.2.2 acepack_1.4.1
## [40] impute_1.56.0 broom_0.5.2 checkmate_1.9.4
## [43] BiocManager_1.30.10 yaml_2.2.0 modelr_0.1.5
## [46] backports_1.1.5 tools_3.5.2 affyio_1.52.0
## [49] ellipsis_0.3.0 Rcpp_1.0.3 base64enc_0.1-3
## [52] zlibbioc_1.28.0 RCurl_1.95-4.12 prettyunits_1.0.2
## [55] rpart_4.1-15 zoo_1.8-6 haven_2.2.0
## [58] cluster_2.1.0 fs_1.3.1 data.table_1.12.8
## [61] reprex_0.3.0 hms_0.5.2 evaluate_0.14
## [64] xtable_1.8-4 XML_3.98-1.20 emdbook_1.3.11
## [67] gridExtra_2.3 compiler_3.5.2 KernSmooth_2.23-16
## [70] crayon_1.3.4 htmltools_0.4.0 geneplotter_1.60.0
## [73] lubridate_1.7.4 DBI_1.0.0 dbplyr_1.4.2
## [76] MASS_7.3-51.4 cli_2.0.0 gdata_2.18.0
## [79] pkgconfig_2.0.3 km.ci_0.5-2 numDeriv_2016.8-1.1
## [82] foreign_0.8-72 xml2_1.2.2 annotate_1.60.1
## [85] vipor_0.4.5 XVector_0.22.0 rvest_0.3.5
## [88] digest_0.6.23 Biostrings_2.50.2 rmarkdown_1.18
## [91] cellranger_1.1.0 survMisc_0.5.5 htmlTable_1.13.3
## [94] curl_4.3 lifecycle_0.1.0 nlme_3.1-142
## [97] jsonlite_1.6 fansi_0.4.0 pillar_1.4.2
## [100] httr_1.4.1 glue_1.3.1 zip_2.0.4
## [103] bit_1.1-14 stringi_1.4.3 blob_1.2.0
## [106] latticeExtra_0.6-28 caTools_1.17.1.3 memoise_1.1.0